{
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "metadata": {},
     "outputs": [],
     "input": [
      "import pandas as pd\n",
      "from twitterSentiment import models\n",
      "import nltk\n",
      "import re"
     ],
     "language": "python",
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "metadata": {},
     "outputs": [],
     "input": [
      "#get the tweets and store in a dataframe\n",
      "from django_pandas.io import read_frame   #make sure to install django-pandashttps://github.com/chrisdev/django-pandas\n",
      "df = read_frame(models.TwitterText.objects.all())"
     ],
     "language": "python",
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "metadata": {},
     "outputs": [],
     "input": [
      "def remove_urls(text):\n",
      "    matched_url = \"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))\"\n",
      "    matched_username = \"@+[\\w_\\:]+\"\n",
      "    #return (re.sub(matched_url,\"\",text))\n",
      "    return (re.sub(matched_url+\"|\"+matched_username,\"\",text))\n",
      "##remove the urls from the tweet text and return the tweets as a list\n",
      "tweets = list(df['twitter_text'].apply(remove_urls))\n",
      "keywords = list(df['twitter_text_keyword'])"
     ],
     "language": "python",
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "20690\n",
        "[('USO($USO),SQQQ($SQQQ),Transocean Ltd($RIG),Apple Inc.($AAPL),ARO($ARO),DXD($DXD),USO($USO),Peabody Energy($BTU),Valero Energy($VLO)', 531), ('Apple Inc.($AAPL),BABA($BABA),Amazon.com Inc($AMZN),FNMA($FNMA)', 511), ('Apple Inc.($AAPL)', 207), ('PLKD($PLKD),STTK($STTK),FVRG($FVRG),CLF($CLF),BABA($BABA),Apple Inc.($AAPL)', 205), ('wag($wag)', 153), ('BlackBerry Limited($BBRY)', 98), ('General Motors Company($GM)', 60), ('TG Therapeutics, Inc.($TGTX)', 43), ('Goldman Sachs Group($GS)', 42), ('Cubist Pharmaceuticals, Inc.($CBST)', 36), ('DGLY($DGLY),Gilead Sciences($GILD),UGAZ($UGAZ),BIOGEN IDEC Inc.($BIIB),Mastercard Inc.($MA)', 35), ('Google Inc.($GOOG)', 32), ('0($0),EOG Resources($EOG),XLE($XLE),JNUG($JNUG),Dollar General Corp($DG),ERX($ERX)', 32), ('0($0),Avago Technologies Ltd($AVGO),United Health Group Inc.($UNH),TLM($TLM),MPEL($MPEL),Under Armour Inc A($UA)', 28), ('Bristol-Myers Squibb($BMY)', 28), ('Starbucks Corp.($SBUX)', 28), ('Citigroup Inc.($C)', 28), ('aapl($aapl)', 27), ('Facebook Inc($FB)', 27), ('c($c)', 26), ('Amazon.com Inc($AMZN)', 23), ('Gilead Sciences($GILD)', 22), ('Walt Disney Co.($DIS)', 21), ('ASNA($ASNA),Visa Inc.($V),TASR($TASR),Verizon Communications($VZ),OVTI($OVTI),GDP($GDP),SWIR($SWIR),VTAE($VTAE),EYES($EYES),IBB($IBB)', 21), ('AREX($AREX),AGIO($AGIO),Chevron Corp.($CVX),FCAU($FCAU),PerkinElmer($PKI),UAL($UAL)', 21), ('Bristol-Myers Squibb($BMY),Merck & Co.($MRK)', 20), ('HIMX($HIMX),SCTY($SCTY),Halliburton Co.($HAL),UUP($UUP),CRUS($CRUS),Express Scripts($ESRX)', 20), ('312($312),Johnson & Johnson($JNJ)', 20), ('Celgene Corp.($CELG),CDW($CDW),SFY($SFY),AMBA($AMBA),The Bank of New York Mellon Corp.($BK)', 20), ('NetFlix Inc.($NFLX)', 20)]\n",
        "20690\n",
        "[('RT  December Portfolio Review with Adjustments to $USO, $SQQQ, $RIG, $AAPL, $ARO, $DXD, $USO, $BTU and $VLO -- .co/…', 531), ('RT  Stock Analyst Finds Best Options Strategies $AAPL $BABA $AMZN $FNMA .co/bI14g1rdlT', 342), ('RT  Stock Analyst Issues New Reports and Opinions on Top Stocks $PLKD $STTK $FVRG $CLF $BABA $AAPL .co/HJAg43ggPM', 168), ('RT  Stock Analyst Finds Best Options Strategies $AAPL $BABA $AMZN $FNMA .co/5mkb4YwJbk', 167), ('RT  $wag .co/DkWTsMPJIc', 98), ('RT  Stock Analyst Issues New Reports and Opinions on Top Stocks $PLKD $STTK $FVRG $CLF $BABA $AAPL .co/Hj79m7RvOT', 36), (' Yung Fokiss Barz - Official Music Video $GM .co/AFUcoZ416E ? Check it out 10,000 + Views', 22), ('RT  The US owns 261,498,926.230 oz. of gold valued at  $312 billion, about the market cap of Johnson &amp; Johnson. $JNJ\\nhttp:/…', 19), ('RT  A couple hours of DETAILED secrets to making BIG MONEY on $AAPL #trades over and over again! .co/NXm9DOrH7X', 14), ('RT  .co/nmJIt67zoC $dkts $stz $deo. #tequila #drinktopshelf #ComingSoon   #subpenny  #.005 imo', 13), ('#Nasdaq100 #recent #market #exit #2: Covered $YHOO short for a 0.79% #gain in 11 days. #YHOO #forex #trading #stocks', 11), ('RT  #ASH14 $BMY $MRK - Blood paper on 9p24.1 &amp; JAK2 amplification → PD-1 ligand ↑expression in HL\\n.co/pVaqKFoQIp http:…', 11), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/lbZzSo6C44 TASR $EOG $XLE $JNUG $DG $ERX', 11), ('Tip #4 How to identify a HOT SECTOR? .co/cFuOlMXLZp #stocks #trading $DGLY $GILD $UGAZ $BIIB $MA', 11), ('RT  Movers and Shakers: Making Range Advances: $ISNS $BTU $AMBA $ZION $BKW $GILD $STI $RF $FRAN $SIMG $ARRY $C $SWHC $VIAB …', 10), ('  Yung Fokiss Barz - Official Music Video $GM .co/AFUcoZ416E ? Check it out 10,000 + Views', 10), ('.co/OM9hSaF2XI &lt;-- 2 stocks trading books: 1 entertaining and 1 educational! Both awesome! $TLT $SYNA $AAPL $FEYE $FB $BAC', 9), ('RT  #imagine \"Do I have enough $wag for you Y/N?\" Calum sends you. .co/Wx7HFHDDaM', 9), ('[Case Study] How A 24 yr old student Made $0.48 Million In 8 Months With Stocks .co/Ek8S5gUhfU $AGIO $CVX $FCAU $PKI $UAL', 9), ('RT  Apple among 5 big IBD 50 winners from much-cheaper oil .co/lCy6WHi00o  $AAPL $SAVE $CMG .co/I5fWx6wdpt', 9), ('Tip #4 How to identify a HOT SECTOR? .co/0CrdLCphOY #stocks #trading $DGLY $GILD $UGAZ $BIIB $MA', 9), ('RT  Why Ford shares are poised for a Santa Claus rally of their own: .co/bEsgD32JLk $F .co/fS1jDJwIGw', 9), (' Yung Fokiss Barz - Official Music Video $GM .co/AFUcoZ416E ? 10,000 + Views', 9), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/Fv2ZjiN52T TASR $EOG $XLE $JNUG $DG $ERX', 9), (\"RT  Our  has begun his predictions for 2015. Here's Part 1: .co/gsCkTzL9Vu $FB $TWTR $NFLX $AAPL $GOOG $Y…\", 9), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/lbZzSo6C44 $AVGO $UNH $TLM $MPEL $UA', 9), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/Ek8S5gUhfU $AVGO $UNH $TLM $MPEL $UA', 8), ('Tip #4 How to identify a HOT SECTOR? .co/ewe2BEND4R #stocks #trading $DGLY $GILD $UGAZ $BIIB $MA', 8), ('RT  $BBRY here is the 60 minutes VIDEO Disrupting Cancer - CBS News .co/Gu81E5V9Ik', 8), ('RT  Apple pivoting iPad education strategy to regain its footing .co/dgFVtJ7EhH $AAPL', 8)]\n"
       ]
      }
     ],
     "input": [
      "print (len(keywords))\n",
      "print (nltk.FreqDist(keywords).most_common(30))\n",
      "print (len(tweets))\n",
      "commonwords=nltk.FreqDist(tweets)\n",
      "print (commonwords.most_common(30))"
     ],
     "language": "python",
     "prompt_number": 11
    }
   ]
  }
 ],
 "cells": [],
 "metadata": {
  "name": "",
  "signature": "sha256:f2e2e023965975a5b2c1295e286fb4c3f8f68c8a2c792167d490df4710154240"
 },
 "nbformat": 3,
 "nbformat_minor": 0
}